{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# Separation"
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Validation Separation"
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 留出法Hold-out\n",
    "使用前提：有足够的数据"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import os\n",
    "import sklearn.model_selection as ms\n",
    "\n",
    "X = np.random.randint(1, 100, 20).reshape(10, 2)\n",
    "print(X)"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[[22 84]\n",
      " [51 81]\n",
      " [56 60]\n",
      " [24 41]\n",
      " [80 93]\n",
      " [16 66]\n",
      " [59 80]\n",
      " [39 48]\n",
      " [70 94]\n",
      " [35 94]]\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "source": [
    "ss = ms.ShuffleSplit(n_splits=10, test_size=0.25)\n",
    "for train, test in ss.split(X):\n",
    "    print(f'train: {train} , test: {test}')"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "train: [9 7 5 0 1 2 8] , test: [6 3 4]\n",
      "train: [9 0 8 7 2 3 4] , test: [5 6 1]\n",
      "train: [1 5 2 7 8 9 4] , test: [6 0 3]\n",
      "train: [3 2 6 9 0 8 5] , test: [1 7 4]\n",
      "train: [8 3 2 4 6 5 1] , test: [0 7 9]\n",
      "train: [3 6 1 7 2 0 4] , test: [9 8 5]\n",
      "train: [1 9 7 3 8 0 5] , test: [6 4 2]\n",
      "train: [7 6 8 0 3 9 5] , test: [1 4 2]\n",
      "train: [0 2 1 5 9 3 8] , test: [6 4 7]\n",
      "train: [8 0 3 1 4 5 6] , test: [2 7 9]\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "### K-fold"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "source": [
    "kf = ms.KFold(n_splits=5)\n",
    "for train, test in kf.split(X):\n",
    "    print(f'train: {train} , test: {test}')\n",
    "# sklearn.model_selectin.Kflod  # ngroups=k"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "train: [2 3 4 5 6 7 8 9] , test: [0 1]\n",
      "train: [0 1 4 5 6 7 8 9] , test: [2 3]\n",
      "train: [0 1 2 3 6 7 8 9] , test: [4 5]\n",
      "train: [0 1 2 3 4 5 8 9] , test: [6 7]\n",
      "train: [0 1 2 3 4 5 6 7] , test: [8 9]\n"
     ]
    }
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.6.8 64-bit ('base': conda)"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "interpreter": {
   "hash": "1fef96a86254b5b64b7294805a674893d583399788ea545149c5bfbe00efcc65"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}